source("~/Documents/PhD/GlobalDA/code/2_inference_TMB/helper_TMB.R")
source("~/Documents/PhD/CDA_in_Cancer/code/functions/meretricious/pretty_plots/prettySignatures.R")
library(gtools)
sigs_pcawg_paper <- read.table("../../data/restricted/pcawg/SigProfilier_PCAWG_WGS_probabilities_SBS.csv", sep = ",", h=T)
# rownames(sigs_pcawg_paper) <- sigs_pcawg_paper[,1]
sigs_pcawg_paper[,c(1,3,4)] <- NULL
# pheatmap::pheatmap(sigs_pcawg_paper)
rownames(sigs_pcawg_paper) <- make.unique(sigs_pcawg_paper$Cancer.Type)
sigs_pcawg_paper$Cancer.Type <- NULL
sigs_pcawg_paper_bool <- apply(sigs_pcawg_paper > 0, 2, as.numeric)
ct <- sub("\\..*", "", rownames(sigs_pcawg_paper))
table(ct)
## ct
## Biliary-AdenoCA Bladder-TCC Bone-Benign Bone-Epith
## 3360 2208 1536 1056
## Bone-Osteosarc Breast-AdenoCA Breast-DCIS Breast-LobularCA
## 3648 19008 288 1248
## Cervix-AdenoCA Cervix-SCC CNS-GBM CNS-Medullo
## 192 1728 3936 14016
## CNS-Oligo CNS-PiloAstro ColoRect-AdenoCA Eso-AdenoCA
## 1728 8544 5760 9408
## Head-SCC Kidney-ChRCC Kidney-RCC Liver-HCC
## 5472 4320 13824 31296
## Lung-AdenoCA Lung-SCC Lymph-BNHL Lymph-CLL
## 3648 4608 10272 9120
## Myeloid-AML Myeloid-MDS Myeloid-MPN Ovary-AdenoCA
## 1056 384 5376 10848
## Panc-AdenoCA Panc-Endocrine Prost-AdenoCA Skin-Melanoma
## 23136 8160 27456 10272
## SoftTissue-Leiomyo SoftTissue-Liposarc Stomach-AdenoCA Thy-AdenoCA
## 1440 1824 7200 4608
## Uterus-AdenoCA
## 4896
pheatmap::pheatmap(sigs_pcawg_paper_bool[grepl("Biliary.AdenoCA", ct),])

## get active signatures in each cancer type
active_per_ct <- sapply(unique(ct), function(ct_it){
names(which(colSums(sigs_pcawg_paper_bool[grepl(ct_it, ct),]) > 0))
})
active_per_ct$Eso_AdenoCA
## NULL
sigs_pcawg_paper_2 <- read.table("../../data/restricted/pcawg/PCAWG_sigProfiler_SBS_signatures_in_samples.csv", sep = ",", h=T)
ct2 <- sigs_pcawg_paper_2$Cancer.Types
rownames(sigs_pcawg_paper_2) = paste0(sigs_pcawg_paper_2$Cancer.Types, '-',
sigs_pcawg_paper_2$Sample.Names)
sigs_pcawg_paper_2 <- sigs_pcawg_paper_2[,-c(1:3)]
lapply(sort(unique(ct2)), function(ct_it){
createBarplot(normalise_rw(as(sigs_pcawg_paper_2[grepl(ct_it, ct2),
active_per_ct[[ct_it]]], 'matrix')))+
ggtitle(ct_it)
})
## Creating plot... it might take some time if the data are large. Number of samples: 35
## Creating plot... it might take some time if the data are large. Number of samples: 23
## Creating plot... it might take some time if the data are large. Number of samples: 16
## Creating plot... it might take some time if the data are large. Number of samples: 11
## Creating plot... it might take some time if the data are large. Number of samples: 38
## Creating plot... it might take some time if the data are large. Number of samples: 198
## Creating plot... it might take some time if the data are large. Number of samples: 3
## Creating plot... it might take some time if the data are large. Number of samples: 13
## Creating plot... it might take some time if the data are large. Number of samples: 2
## Creating plot... it might take some time if the data are large. Number of samples: 18
## Creating plot... it might take some time if the data are large. Number of samples: 41
## Creating plot... it might take some time if the data are large. Number of samples: 146
## Creating plot... it might take some time if the data are large. Number of samples: 18
## Creating plot... it might take some time if the data are large. Number of samples: 89
## Creating plot... it might take some time if the data are large. Number of samples: 60
## Creating plot... it might take some time if the data are large. Number of samples: 98
## Creating plot... it might take some time if the data are large. Number of samples: 57
## Creating plot... it might take some time if the data are large. Number of samples: 45
## Creating plot... it might take some time if the data are large. Number of samples: 144
## Creating plot... it might take some time if the data are large. Number of samples: 326
## Creating plot... it might take some time if the data are large. Number of samples: 38
## Creating plot... it might take some time if the data are large. Number of samples: 48
## Creating plot... it might take some time if the data are large. Number of samples: 107
## Creating plot... it might take some time if the data are large. Number of samples: 95
## Creating plot... it might take some time if the data are large. Number of samples: 11
## Creating plot... it might take some time if the data are large. Number of samples: 4
## Creating plot... it might take some time if the data are large. Number of samples: 56
## Creating plot... it might take some time if the data are large. Number of samples: 113
## Creating plot... it might take some time if the data are large. Number of samples: 241
## Creating plot... it might take some time if the data are large. Number of samples: 85
## Creating plot... it might take some time if the data are large. Number of samples: 286
## Creating plot... it might take some time if the data are large. Number of samples: 107
## Creating plot... it might take some time if the data are large. Number of samples: 15
## Creating plot... it might take some time if the data are large. Number of samples: 19
## Creating plot... it might take some time if the data are large. Number of samples: 75
## Creating plot... it might take some time if the data are large. Number of samples: 48
## Creating plot... it might take some time if the data are large. Number of samples: 51
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

##
## [[11]]

##
## [[12]]

##
## [[13]]

##
## [[14]]

##
## [[15]]

##
## [[16]]

##
## [[17]]

##
## [[18]]

##
## [[19]]

##
## [[20]]

##
## [[21]]

##
## [[22]]

##
## [[23]]

##
## [[24]]

##
## [[25]]

##
## [[26]]

##
## [[27]]

##
## [[28]]

##
## [[29]]

##
## [[30]]

##
## [[31]]

##
## [[32]]

##
## [[33]]

##
## [[34]]

##
## [[35]]

##
## [[36]]

##
## [[37]]

# active_sigs <- sapply(unique(ct), function(ct_it){
# gsub("_.*", "", (names(which(colSums(sigs_pcawg_paper_bool[grepl(ct_it, ct),]) > 0))))
# })
uniq_sigs <- gtools::mixedsort(unique(unlist(active_per_ct)))
active_sigs_tab <- t(sapply(active_per_ct, function(i) as.numeric(uniq_sigs %in% i)))
colnames(active_sigs_tab) <- uniq_sigs
rownames(active_sigs_tab) <- toupper(gsub("_", "-", rownames(active_sigs_tab)))
active_sigs_tab = cbind(rownames(active_sigs_tab), active_sigs_tab)
colnames(active_sigs_tab)[1] = 'id2'
write.table(active_sigs_tab, "../../data/cosmic/active_signatures_PCAWGpaper.txt", quote = F, sep = "\t", col.names = NA)